1   package org.apache.lucene.facet;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collections;
24  import java.util.HashMap;
25  import java.util.HashSet;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Set;
29  
30  import org.apache.lucene.analysis.MockAnalyzer;
31  import org.apache.lucene.document.Document;
32  import org.apache.lucene.document.Field;
33  import org.apache.lucene.document.SortedDocValuesField;
34  import org.apache.lucene.document.StringField;
35  import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
36  import org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState;
37  import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
38  import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
39  import org.apache.lucene.facet.taxonomy.TaxonomyReader;
40  import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
41  import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
42  import org.apache.lucene.index.IndexReader;
43  import org.apache.lucene.index.IndexWriterConfig;
44  import org.apache.lucene.index.LeafReaderContext;
45  import org.apache.lucene.index.RandomIndexWriter;
46  import org.apache.lucene.index.Term;
47  import org.apache.lucene.search.BooleanClause;
48  import org.apache.lucene.search.BooleanClause.Occur;
49  import org.apache.lucene.search.BooleanQuery;
50  import org.apache.lucene.search.FilteredQuery;
51  import org.apache.lucene.search.IndexSearcher;
52  import org.apache.lucene.search.MatchAllDocsQuery;
53  import org.apache.lucene.search.Query;
54  import org.apache.lucene.search.RandomAccessWeight;
55  import org.apache.lucene.search.ScoreDoc;
56  import org.apache.lucene.search.SimpleCollector;
57  import org.apache.lucene.search.Sort;
58  import org.apache.lucene.search.SortField;
59  import org.apache.lucene.search.TermQuery;
60  import org.apache.lucene.search.TopDocs;
61  import org.apache.lucene.search.Weight;
62  import org.apache.lucene.store.Directory;
63  import org.apache.lucene.util.Bits;
64  import org.apache.lucene.util.BytesRef;
65  import org.apache.lucene.util.IOUtils;
66  import org.apache.lucene.util.InPlaceMergeSorter;
67  import org.apache.lucene.util.InfoStream;
68  import org.apache.lucene.util.TestUtil;
69  
70  public class TestDrillSideways extends FacetTestCase {
71  
72    public void testBasic() throws Exception {
73      Directory dir = newDirectory();
74      Directory taxoDir = newDirectory();
75  
76      // Writes facet ords to a separate directory from the
77      // main index:
78      DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
79  
80      FacetsConfig config = new FacetsConfig();
81      config.setHierarchical("Publish Date", true);
82  
83      RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
84  
85      Document doc = new Document();
86      doc.add(new FacetField("Author", "Bob"));
87      doc.add(new FacetField("Publish Date", "2010", "10", "15"));
88      writer.addDocument(config.build(taxoWriter, doc));
89  
90      doc = new Document();
91      doc.add(new FacetField("Author", "Lisa"));
92      doc.add(new FacetField("Publish Date", "2010", "10", "20"));
93      writer.addDocument(config.build(taxoWriter, doc));
94  
95      doc = new Document();
96      doc.add(new FacetField("Author", "Lisa"));
97      doc.add(new FacetField("Publish Date", "2012", "1", "1"));
98      writer.addDocument(config.build(taxoWriter, doc));
99  
100     doc = new Document();
101     doc.add(new FacetField("Author", "Susan"));
102     doc.add(new FacetField("Publish Date", "2012", "1", "7"));
103     writer.addDocument(config.build(taxoWriter, doc));
104 
105     doc = new Document();
106     doc.add(new FacetField("Author", "Frank"));
107     doc.add(new FacetField("Publish Date", "1999", "5", "5"));
108     writer.addDocument(config.build(taxoWriter, doc));
109 
110     // NRT open
111     IndexSearcher searcher = newSearcher(writer.getReader());
112 
113     //System.out.println("searcher=" + searcher);
114 
115     // NRT open
116     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
117 
118     DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
119 
120     //  case: drill-down on a single field; in this
121     // case the drill-sideways + drill-down counts ==
122     // drill-down of just the query: 
123     DrillDownQuery ddq = new DrillDownQuery(config);
124     ddq.add("Author", "Lisa");
125     DrillSidewaysResult r = ds.search(null, ddq, 10);
126     assertEquals(2, r.hits.totalHits);
127     // Publish Date is only drill-down, and Lisa published
128     // one in 2012 and one in 2010:
129     assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
130 
131     // Author is drill-sideways + drill-down: Lisa
132     // (drill-down) published twice, and Frank/Susan/Bob
133     // published once:
134     assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
135 
136     // Same simple case, but no baseQuery (pure browse):
137     // drill-down on a single field; in this case the
138     // drill-sideways + drill-down counts == drill-down of
139     // just the query:
140     ddq = new DrillDownQuery(config);
141     ddq.add("Author", "Lisa");
142     r = ds.search(null, ddq, 10);
143 
144     assertEquals(2, r.hits.totalHits);
145     // Publish Date is only drill-down, and Lisa published
146     // one in 2012 and one in 2010:
147     assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
148 
149     // Author is drill-sideways + drill-down: Lisa
150     // (drill-down) published twice, and Frank/Susan/Bob
151     // published once:
152     assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
153 
154     // Another simple case: drill-down on single fields
155     // but OR of two values
156     ddq = new DrillDownQuery(config);
157     ddq.add("Author", "Lisa");
158     ddq.add("Author", "Bob");
159     r = ds.search(null, ddq, 10);
160     assertEquals(3, r.hits.totalHits);
161     // Publish Date is only drill-down: Lisa and Bob
162     // (drill-down) published twice in 2010 and once in 2012:
163     assertEquals("dim=Publish Date path=[] value=3 childCount=2\n  2010 (2)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
164     // Author is drill-sideways + drill-down: Lisa
165     // (drill-down) published twice, and Frank/Susan/Bob
166     // published once:
167     assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
168 
169     assertTrue(r.facets instanceof MultiFacets);
170     List<FacetResult> allResults = r.facets.getAllDims(10);
171     assertEquals(2, allResults.size());
172     assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", allResults.get(0).toString());
173     assertEquals("dim=Publish Date path=[] value=3 childCount=2\n  2010 (2)\n  2012 (1)\n", allResults.get(1).toString());
174 
175     // More interesting case: drill-down on two fields
176     ddq = new DrillDownQuery(config);
177     ddq.add("Author", "Lisa");
178     ddq.add("Publish Date", "2010");
179     r = ds.search(null, ddq, 10);
180     assertEquals(1, r.hits.totalHits);
181     // Publish Date is drill-sideways + drill-down: Lisa
182     // (drill-down) published once in 2010 and once in 2012:
183     assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
184     // Author is drill-sideways + drill-down:
185     // only Lisa & Bob published (once each) in 2010:
186     assertEquals("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
187 
188     // Even more interesting case: drill down on two fields,
189     // but one of them is OR
190     ddq = new DrillDownQuery(config);
191 
192     // Drill down on Lisa or Bob:
193     ddq.add("Author", "Lisa");
194     ddq.add("Publish Date", "2010");
195     ddq.add("Author", "Bob");
196     r = ds.search(null, ddq, 10);
197     assertEquals(2, r.hits.totalHits);
198     // Publish Date is both drill-sideways + drill-down:
199     // Lisa or Bob published twice in 2010 and once in 2012:
200     assertEquals("dim=Publish Date path=[] value=3 childCount=2\n  2010 (2)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
201     // Author is drill-sideways + drill-down:
202     // only Lisa & Bob published (once each) in 2010:
203     assertEquals("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
204 
205     // Test drilling down on invalid field:
206     ddq = new DrillDownQuery(config);
207     ddq.add("Foobar", "Baz");
208     r = ds.search(null, ddq, 10);
209     assertEquals(0, r.hits.totalHits);
210     assertNull(r.facets.getTopChildren(10, "Publish Date"));
211     assertNull(r.facets.getTopChildren(10, "Foobar"));
212 
213     // Test drilling down on valid term or'd with invalid term:
214     ddq = new DrillDownQuery(config);
215     ddq.add("Author", "Lisa");
216     ddq.add("Author", "Tom");
217     r = ds.search(null, ddq, 10);
218     assertEquals(2, r.hits.totalHits);
219     // Publish Date is only drill-down, and Lisa published
220     // one in 2012 and one in 2010:
221     assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
222     // Author is drill-sideways + drill-down: Lisa
223     // (drill-down) published twice, and Frank/Susan/Bob
224     // published once:
225     assertEquals("dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n  Susan (1)\n  Frank (1)\n", r.facets.getTopChildren(10, "Author").toString());
226 
227     // LUCENE-4915: test drilling down on a dimension but
228     // NOT facet counting it:
229     ddq = new DrillDownQuery(config);
230     ddq.add("Author", "Lisa");
231     ddq.add("Author", "Tom");
232     r = ds.search(null, ddq, 10);
233     assertEquals(2, r.hits.totalHits);
234     // Publish Date is only drill-down, and Lisa published
235     // one in 2012 and one in 2010:
236     assertEquals("dim=Publish Date path=[] value=2 childCount=2\n  2010 (1)\n  2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
237 
238     // Test main query gets null scorer:
239     ddq = new DrillDownQuery(config, new TermQuery(new Term("foobar", "baz")));
240     ddq.add("Author", "Lisa");
241     r = ds.search(null, ddq, 10);
242 
243     assertEquals(0, r.hits.totalHits);
244     assertNull(r.facets.getTopChildren(10, "Publish Date"));
245     assertNull(r.facets.getTopChildren(10, "Author"));
246     writer.close();
247     IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
248   }
249 
250   public void testSometimesInvalidDrillDown() throws Exception {
251     Directory dir = newDirectory();
252     Directory taxoDir = newDirectory();
253     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
254 
255     // Writes facet ords to a separate directory from the
256     // main index:
257     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
258 
259     FacetsConfig config = new FacetsConfig();
260     config.setHierarchical("Publish Date", true);
261 
262     Document doc = new Document();
263     doc.add(new FacetField("Author", "Bob"));
264     doc.add(new FacetField("Publish Date", "2010", "10", "15"));
265     writer.addDocument(config.build(taxoWriter, doc));
266 
267     doc = new Document();
268     doc.add(new FacetField("Author", "Lisa"));
269     doc.add(new FacetField("Publish Date", "2010", "10", "20"));
270     writer.addDocument(config.build(taxoWriter, doc));
271 
272     writer.commit();
273 
274     // 2nd segment has no Author:
275     doc = new Document();
276     doc.add(new FacetField("Foobar", "Lisa"));
277     doc.add(new FacetField("Publish Date", "2012", "1", "1"));
278     writer.addDocument(config.build(taxoWriter, doc));
279 
280     // NRT open
281     IndexSearcher searcher = newSearcher(writer.getReader());
282 
283     //System.out.println("searcher=" + searcher);
284 
285     // NRT open
286     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
287 
288     DrillDownQuery ddq = new DrillDownQuery(config);
289     ddq.add("Author", "Lisa");
290     DrillSidewaysResult r = new DrillSideways(searcher, config, taxoReader).search(null, ddq, 10);
291 
292     assertEquals(1, r.hits.totalHits);
293     // Publish Date is only drill-down, and Lisa published
294     // one in 2012 and one in 2010:
295     assertEquals("dim=Publish Date path=[] value=1 childCount=1\n  2010 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString());
296     // Author is drill-sideways + drill-down: Lisa
297     // (drill-down) published once, and Bob
298     // published once:
299     assertEquals("dim=Author path=[] value=2 childCount=2\n  Bob (1)\n  Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString());
300 
301     writer.close();
302     IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
303   }
304 
305   public void testMultipleRequestsPerDim() throws Exception {
306     Directory dir = newDirectory();
307     Directory taxoDir = newDirectory();
308     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
309 
310     // Writes facet ords to a separate directory from the
311     // main index:
312     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
313 
314     FacetsConfig config = new FacetsConfig();
315     config.setHierarchical("dim", true);
316 
317     Document doc = new Document();
318     doc.add(new FacetField("dim", "a", "x"));
319     writer.addDocument(config.build(taxoWriter, doc));
320 
321     doc = new Document();
322     doc.add(new FacetField("dim", "a", "y"));
323     writer.addDocument(config.build(taxoWriter, doc));
324 
325     doc = new Document();
326     doc.add(new FacetField("dim", "a", "z"));
327     writer.addDocument(config.build(taxoWriter, doc));
328 
329     doc = new Document();
330     doc.add(new FacetField("dim", "b"));
331     writer.addDocument(config.build(taxoWriter, doc));
332 
333     doc = new Document();
334     doc.add(new FacetField("dim", "c"));
335     writer.addDocument(config.build(taxoWriter, doc));
336 
337     doc = new Document();
338     doc.add(new FacetField("dim", "d"));
339     writer.addDocument(config.build(taxoWriter, doc));
340 
341     // NRT open
342     IndexSearcher searcher = newSearcher(writer.getReader());
343 
344     //System.out.println("searcher=" + searcher);
345 
346     // NRT open
347     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
348 
349     DrillDownQuery ddq = new DrillDownQuery(config);
350     ddq.add("dim", "a");
351     DrillSidewaysResult r = new DrillSideways(searcher, config, taxoReader).search(null, ddq, 10);
352 
353     assertEquals(3, r.hits.totalHits);
354     assertEquals("dim=dim path=[] value=6 childCount=4\n  a (3)\n  b (1)\n  c (1)\n  d (1)\n", r.facets.getTopChildren(10, "dim").toString());
355     assertEquals("dim=dim path=[a] value=3 childCount=3\n  x (1)\n  y (1)\n  z (1)\n", r.facets.getTopChildren(10, "dim", "a").toString());
356 
357     writer.close();
358     IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
359   }
360 
361   private static class Doc implements Comparable<Doc> {
362     String id;
363     String contentToken;
364 
365     public Doc() {}
366     
367     // -1 if the doc is missing this dim, else the index
368     // -into the values for this dim:
369     int[] dims;
370 
371     // 2nd value per dim for the doc (so we test
372     // multi-valued fields):
373     int[] dims2;
374     boolean deleted;
375 
376     @Override
377     public int compareTo(Doc other) {
378       return id.compareTo(other.id);
379     }
380   }
381 
382   private double aChance, bChance, cChance;
383 
384   private String randomContentToken(boolean isQuery) {
385     double d = random().nextDouble();
386     if (isQuery) {
387       if (d < 0.33) {
388         return "a";
389       } else if (d < 0.66) {
390         return "b";
391       } else {
392         return "c";
393       }
394     } else {
395       if (d <= aChance) {
396         return "a";
397       } else if (d < aChance + bChance) {
398         return "b";
399       } else {
400         return "c";
401       }
402     }
403   }
404 
405   public void testRandom() throws Exception {
406 
407     while (aChance == 0.0) {
408       aChance = random().nextDouble();
409     }
410     while (bChance == 0.0) {
411       bChance = random().nextDouble();
412     }
413     while (cChance == 0.0) {
414       cChance = random().nextDouble();
415     }
416     //aChance = .01;
417     //bChance = 0.5;
418     //cChance = 1.0;
419     double sum = aChance + bChance + cChance;
420     aChance /= sum;
421     bChance /= sum;
422     cChance /= sum;
423 
424     int numDims = TestUtil.nextInt(random(), 2, 5);
425     //int numDims = 3;
426     int numDocs = atLeast(3000);
427     //int numDocs = 20;
428     if (VERBOSE) {
429       System.out.println("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance);
430     }
431     String[][] dimValues = new String[numDims][];
432     int valueCount = 2;
433 
434     for(int dim=0;dim<numDims;dim++) {
435       Set<String> values = new HashSet<>();
436       while (values.size() < valueCount) {
437         String s = TestUtil.randomRealisticUnicodeString(random());
438         //String s = _TestUtil.randomString(random());
439         if (s.length() > 0) {
440           values.add(s);
441         }
442       } 
443       dimValues[dim] = values.toArray(new String[values.size()]);
444       valueCount *= 2;
445     }
446 
447     List<Doc> docs = new ArrayList<>();
448     for(int i=0;i<numDocs;i++) {
449       Doc doc = new Doc();
450       doc.id = ""+i;
451       doc.contentToken = randomContentToken(false);
452       doc.dims = new int[numDims];
453       doc.dims2 = new int[numDims];
454       for(int dim=0;dim<numDims;dim++) {
455         if (random().nextInt(5) == 3) {
456           // This doc is missing this dim:
457           doc.dims[dim] = -1;
458         } else if (dimValues[dim].length <= 4) {
459           int dimUpto = 0;
460           doc.dims[dim] = dimValues[dim].length-1;
461           while (dimUpto < dimValues[dim].length) {
462             if (random().nextBoolean()) {
463               doc.dims[dim] = dimUpto;
464               break;
465             }
466             dimUpto++;
467           }
468         } else {
469           doc.dims[dim] = random().nextInt(dimValues[dim].length);
470         }
471 
472         if (random().nextInt(5) == 3) {
473           // 2nd value:
474           doc.dims2[dim] = random().nextInt(dimValues[dim].length);
475         } else {
476           doc.dims2[dim] = -1;
477         }
478       }
479       docs.add(doc);
480     }
481 
482     Directory d = newDirectory();
483     Directory td = newDirectory();
484 
485     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
486     iwc.setInfoStream(InfoStream.NO_OUTPUT);
487     RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc);
488     DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
489     FacetsConfig config = new FacetsConfig();
490     for(int i=0;i<numDims;i++) {
491       config.setMultiValued("dim"+i, true);
492     }
493 
494     boolean doUseDV = random().nextBoolean();
495 
496     for(Doc rawDoc : docs) {
497       Document doc = new Document();
498       doc.add(newStringField("id", rawDoc.id, Field.Store.YES));
499       doc.add(new SortedDocValuesField("id", new BytesRef(rawDoc.id)));
500       doc.add(newStringField("content", rawDoc.contentToken, Field.Store.NO));
501 
502       if (VERBOSE) {
503         System.out.println("  doc id=" + rawDoc.id + " token=" + rawDoc.contentToken);
504       }
505       for(int dim=0;dim<numDims;dim++) {
506         int dimValue = rawDoc.dims[dim];
507         if (dimValue != -1) {
508           if (doUseDV) {
509             doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue]));
510           } else {
511             doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue]));
512           }
513           doc.add(new StringField("dim" + dim, dimValues[dim][dimValue], Field.Store.YES));
514           if (VERBOSE) {
515             System.out.println("    dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue]));
516           }
517         }
518         int dimValue2 = rawDoc.dims2[dim];
519         if (dimValue2 != -1) {
520           if (doUseDV) {
521             doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue2]));
522           } else {
523             doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue2]));
524           }
525           doc.add(new StringField("dim" + dim, dimValues[dim][dimValue2], Field.Store.YES));
526           if (VERBOSE) {
527             System.out.println("      dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue2]));
528           }
529         }
530       }
531 
532       w.addDocument(config.build(tw, doc));
533     }
534 
535     if (random().nextBoolean()) {
536       // Randomly delete a few docs:
537       int numDel = TestUtil.nextInt(random(), 1, (int) (numDocs * 0.05));
538       if (VERBOSE) {
539         System.out.println("delete " + numDel);
540       }
541       int delCount = 0;
542       while (delCount < numDel) {
543         Doc doc = docs.get(random().nextInt(docs.size()));
544         if (!doc.deleted) {
545           if (VERBOSE) {
546             System.out.println("  delete id=" + doc.id);
547           }
548           doc.deleted = true;
549           w.deleteDocuments(new Term("id", doc.id));
550           delCount++;
551         }
552       }
553     }
554 
555     if (random().nextBoolean()) {
556       if (VERBOSE) {
557         System.out.println("TEST: forceMerge(1)...");
558       }
559       w.forceMerge(1);
560     }
561     IndexReader r = w.getReader();
562 
563     final SortedSetDocValuesReaderState sortedSetDVState;
564     IndexSearcher s = newSearcher(r);
565     
566     if (doUseDV) {
567       sortedSetDVState = new DefaultSortedSetDocValuesReaderState(s.getIndexReader());
568     } else {
569       sortedSetDVState = null;
570     }
571 
572     if (VERBOSE) {
573       System.out.println("r.numDocs() = " + r.numDocs());
574     }
575 
576     // NRT open
577     TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
578 
579     int numIters = atLeast(10);
580 
581     for(int iter=0;iter<numIters;iter++) {
582 
583       String contentToken = random().nextInt(30) == 17 ? null : randomContentToken(true);
584       int numDrillDown = TestUtil.nextInt(random(), 1, Math.min(4, numDims));
585       if (VERBOSE) {
586         System.out.println("\nTEST: iter=" + iter + " baseQuery=" + contentToken + " numDrillDown=" + numDrillDown + " useSortedSetDV=" + doUseDV);
587       }
588 
589       String[][] drillDowns = new String[numDims][];
590 
591       int count = 0;
592       boolean anyMultiValuedDrillDowns = false;
593       while (count < numDrillDown) {
594         int dim = random().nextInt(numDims);
595         if (drillDowns[dim] == null) {
596           if (random().nextBoolean()) {
597             // Drill down on one value:
598             drillDowns[dim] = new String[] {dimValues[dim][random().nextInt(dimValues[dim].length)]};
599           } else {
600             int orCount = TestUtil.nextInt(random(), 1, Math.min(5, dimValues[dim].length));
601             drillDowns[dim] = new String[orCount];
602             anyMultiValuedDrillDowns |= orCount > 1;
603             for(int i=0;i<orCount;i++) {
604               while (true) {
605                 String value = dimValues[dim][random().nextInt(dimValues[dim].length)];
606                 for(int j=0;j<i;j++) {
607                   if (value.equals(drillDowns[dim][j])) {
608                     value = null;
609                     break;
610                   }
611                 }
612                 if (value != null) {
613                   drillDowns[dim][i] = value;
614                   break;
615                 }
616               }
617             }
618           }
619           if (VERBOSE) {
620             BytesRef[] values = new BytesRef[drillDowns[dim].length];
621             for(int i=0;i<values.length;i++) {
622               values[i] = new BytesRef(drillDowns[dim][i]);
623             }
624             System.out.println("  dim" + dim + "=" + Arrays.toString(values));
625           }
626           count++;
627         }
628       }
629 
630       Query baseQuery;
631       if (contentToken == null) {
632         baseQuery = new MatchAllDocsQuery();
633       } else {
634         baseQuery = new TermQuery(new Term("content", contentToken));
635       }
636 
637       DrillDownQuery ddq = new DrillDownQuery(config, baseQuery);
638 
639       for(int dim=0;dim<numDims;dim++) {
640         if (drillDowns[dim] != null) {
641           for(String value : drillDowns[dim]) {
642             ddq.add("dim" + dim, value);
643           }
644         }
645       }
646 
647       Query filter;
648       if (random().nextInt(7) == 6) {
649         if (VERBOSE) {
650           System.out.println("  only-even filter");
651         }
652         filter = new Query() {
653 
654           @Override
655           public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
656             return new RandomAccessWeight(this) {
657               @Override
658               protected Bits getMatchingDocs(final LeafReaderContext context) throws IOException {
659                 return new Bits() {
660 
661                   @Override
662                   public boolean get(int docID) {
663                     try {
664                       return (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0;
665                     } catch (NumberFormatException | IOException e) {
666                       throw new RuntimeException(e);
667                     }
668                   }
669 
670                   @Override
671                   public int length() {
672                     return context.reader().maxDoc();
673                   }
674                   
675                 };
676               }
677             };
678           }
679 
680           @Override
681           public String toString(String field) {
682             return "drillSidewaysTestFilter";
683           }
684 
685         };
686       } else {
687         filter = null;
688       }
689 
690       // Verify docs are always collected in order.  If we
691       // had an AssertingScorer it could catch it when
692       // Weight.scoresDocsOutOfOrder lies!:
693       new DrillSideways(s, config, tr).search(ddq,
694                            new SimpleCollector() {
695                              int lastDocID;
696 
697                              @Override
698                              public void collect(int doc) {
699                                assert doc > lastDocID;
700                                lastDocID = doc;
701                              }
702 
703                              @Override
704                              protected void doSetNextReader(LeafReaderContext context) throws IOException {
705                                lastDocID = -1;
706                              }
707 
708                             @Override
709                             public boolean needsScores() {
710                               return false;
711                             }
712                            });
713 
714       // Also separately verify that DS respects the
715       // scoreSubDocsAtOnce method, to ensure that all
716       // subScorers are on the same docID:
717       if (!anyMultiValuedDrillDowns) {
718         // Can only do this test when there are no OR'd
719         // drill-down values, because in that case it's
720         // easily possible for one of the DD terms to be on
721         // a future docID:
722         new DrillSideways(s, config, tr) {
723           @Override
724           protected boolean scoreSubDocsAtOnce() {
725             return true;
726           }
727         }.search(ddq, new AssertingSubDocsAtOnceCollector());
728       }
729 
730       TestFacetResult expected = slowDrillSidewaysSearch(s, docs, contentToken, drillDowns, dimValues, filter);
731 
732       Sort sort = new Sort(new SortField("id", SortField.Type.STRING));
733       DrillSideways ds;
734       if (doUseDV) {
735         ds = new DrillSideways(s, config, sortedSetDVState);
736       } else {
737         ds = new DrillSideways(s, config, tr) {
738             @Override
739             protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
740               Map<String,Facets> drillSidewaysFacets = new HashMap<>();
741               Facets drillDownFacets = getTaxonomyFacetCounts(taxoReader, config, drillDowns);
742               if (drillSideways != null) {
743                 for(int i=0;i<drillSideways.length;i++) {
744                   drillSidewaysFacets.put(drillSidewaysDims[i],
745                                           getTaxonomyFacetCounts(taxoReader, config, drillSideways[i]));
746                 }
747               }
748 
749               if (drillSidewaysFacets.isEmpty()) {
750                 return drillDownFacets;
751               } else {
752                 return new MultiFacets(drillSidewaysFacets, drillDownFacets);
753               }
754 
755             }
756           };
757       }
758 
759       // Retrieve all facets:
760       DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true);
761 
762       TopDocs hits = s.search(baseQuery, numDocs);
763       Map<String,Float> scores = new HashMap<>();
764       for(ScoreDoc sd : hits.scoreDocs) {
765         scores.put(s.doc(sd.doc).get("id"), sd.score);
766       }
767       if (VERBOSE) {
768         System.out.println("  verify all facets");
769       }
770       verifyEquals(dimValues, s, expected, actual, scores, doUseDV);
771 
772       // Make sure drill down doesn't change score:
773       Query q = ddq;
774       if (filter != null) {
775         q = new BooleanQuery.Builder()
776             .add(q, Occur.MUST)
777             .add(filter, Occur.FILTER)
778             .build();
779       }
780       TopDocs ddqHits = s.search(q, numDocs);
781       assertEquals(expected.hits.size(), ddqHits.totalHits);
782       for(int i=0;i<expected.hits.size();i++) {
783         // Score should be IDENTICAL:
784         assertEquals(scores.get(expected.hits.get(i).id), ddqHits.scoreDocs[i].score, 0.0f);
785       }
786     }
787 
788     w.close();
789     IOUtils.close(r, tr, tw, d, td);
790   }
791 
792   private static class Counters {
793     int[][] counts;
794 
795     public Counters(String[][] dimValues) {
796       counts = new int[dimValues.length][];
797       for(int dim=0;dim<dimValues.length;dim++) {
798         counts[dim] = new int[dimValues[dim].length];
799       }
800     }
801 
802     public void inc(int[] dims, int[] dims2) {
803       inc(dims, dims2, -1);
804     }
805 
806     public void inc(int[] dims, int[] dims2, int onlyDim) {
807       assert dims.length == counts.length;
808       assert dims2.length == counts.length;
809       for(int dim=0;dim<dims.length;dim++) {
810         if (onlyDim == -1 || dim == onlyDim) {
811           if (dims[dim] != -1) {
812             counts[dim][dims[dim]]++;
813           }
814           if (dims2[dim] != -1 && dims2[dim] != dims[dim]) {
815             counts[dim][dims2[dim]]++;
816           }
817         }
818       }
819     }
820   }
821 
822   private static class TestFacetResult {
823     List<Doc> hits;
824     int[][] counts;
825     int[] uniqueCounts;
826     public TestFacetResult() {}
827   }
828   
829   private int[] getTopNOrds(final int[] counts, final String[] values, int topN) {
830     final int[] ids = new int[counts.length];
831     for(int i=0;i<ids.length;i++) {
832       ids[i] = i;
833     }
834 
835     // Naive (on purpose, to reduce bug in tester/gold):
836     // sort all ids, then return top N slice:
837     new InPlaceMergeSorter() {
838 
839       @Override
840       protected void swap(int i, int j) {
841         int id = ids[i];
842         ids[i] = ids[j];
843         ids[j] = id;
844       }
845 
846       @Override
847       protected int compare(int i, int j) {
848         int counti = counts[ids[i]];
849         int countj = counts[ids[j]];
850         // Sort by count descending...
851         if (counti > countj) {
852           return -1;
853         } else if (counti < countj) {
854           return 1;
855         } else {
856           // ... then by label ascending:
857           return new BytesRef(values[ids[i]]).compareTo(new BytesRef(values[ids[j]]));
858         }
859       }
860 
861     }.sort(0, ids.length);
862 
863     if (topN > ids.length) {
864       topN = ids.length;
865     }
866 
867     int numSet = topN;
868     for(int i=0;i<topN;i++) {
869       if (counts[ids[i]] == 0) {
870         numSet = i;
871         break;
872       }
873     }
874 
875     int[] topNIDs = new int[numSet];
876     System.arraycopy(ids, 0, topNIDs, 0, topNIDs.length);
877     return topNIDs;
878   }
879 
880   private TestFacetResult slowDrillSidewaysSearch(IndexSearcher s, List<Doc> docs,
881                                                         String contentToken, String[][] drillDowns,
882                                                         String[][] dimValues, Query onlyEven) throws Exception {
883     int numDims = dimValues.length;
884 
885     List<Doc> hits = new ArrayList<>();
886     Counters drillDownCounts = new Counters(dimValues);
887     Counters[] drillSidewaysCounts = new Counters[dimValues.length];
888     for(int dim=0;dim<numDims;dim++) {
889       drillSidewaysCounts[dim] = new Counters(dimValues);
890     }
891 
892     if (VERBOSE) {
893       System.out.println("  compute expected");
894     }
895 
896     nextDoc: for(Doc doc : docs) {
897       if (doc.deleted) {
898         continue;
899       }
900       if (onlyEven != null & (Integer.parseInt(doc.id) & 1) != 0) {
901         continue;
902       }
903       if (contentToken == null || doc.contentToken.equals(contentToken)) {
904         int failDim = -1;
905         for(int dim=0;dim<numDims;dim++) {
906           if (drillDowns[dim] != null) {
907             String docValue = doc.dims[dim] == -1 ? null : dimValues[dim][doc.dims[dim]];
908             String docValue2 = doc.dims2[dim] == -1 ? null : dimValues[dim][doc.dims2[dim]];
909             boolean matches = false;
910             for(String value : drillDowns[dim]) {
911               if (value.equals(docValue) || value.equals(docValue2)) {
912                 matches = true;
913                 break;
914               }
915             }
916             if (!matches) {
917               if (failDim == -1) {
918                 // Doc could be a near-miss, if no other dim fails
919                 failDim = dim;
920               } else {
921                 // Doc isn't a hit nor a near-miss
922                 continue nextDoc;
923               }
924             }
925           }
926         }
927 
928         if (failDim == -1) {
929           if (VERBOSE) {
930             System.out.println("    exp: id=" + doc.id + " is a hit");
931           }
932           // Hit:
933           hits.add(doc);
934           drillDownCounts.inc(doc.dims, doc.dims2);
935           for(int dim=0;dim<dimValues.length;dim++) {
936             drillSidewaysCounts[dim].inc(doc.dims, doc.dims2);
937           }
938         } else {
939           if (VERBOSE) {
940             System.out.println("    exp: id=" + doc.id + " is a near-miss on dim=" + failDim);
941           }
942           drillSidewaysCounts[failDim].inc(doc.dims, doc.dims2, failDim);
943         }
944       }
945     }
946 
947     Map<String,Integer> idToDocID = new HashMap<>();
948     for(int i=0;i<s.getIndexReader().maxDoc();i++) {
949       idToDocID.put(s.doc(i).get("id"), i);
950     }
951 
952     Collections.sort(hits);
953 
954     TestFacetResult res = new TestFacetResult();
955     res.hits = hits;
956     res.counts = new int[numDims][];
957     res.uniqueCounts = new int[numDims];
958     for (int dim = 0; dim < numDims; dim++) {
959       if (drillDowns[dim] != null) {
960         res.counts[dim] = drillSidewaysCounts[dim].counts[dim];
961       } else {
962         res.counts[dim] = drillDownCounts.counts[dim];
963       }
964       int uniqueCount = 0;
965       for (int j = 0; j < res.counts[dim].length; j++) {
966         if (res.counts[dim][j] != 0) {
967           uniqueCount++;
968         }
969       }
970       res.uniqueCounts[dim] = uniqueCount;
971     }
972 
973     return res;
974   }
975 
976   void verifyEquals(String[][] dimValues, IndexSearcher s, TestFacetResult expected,
977                     DrillSidewaysResult actual, Map<String,Float> scores, boolean isSortedSetDV) throws Exception {
978     if (VERBOSE) {
979       System.out.println("  verify totHits=" + expected.hits.size());
980     }
981     assertEquals(expected.hits.size(), actual.hits.totalHits);
982     assertEquals(expected.hits.size(), actual.hits.scoreDocs.length);
983     for(int i=0;i<expected.hits.size();i++) {
984       if (VERBOSE) {
985         System.out.println("    hit " + i + " expected=" + expected.hits.get(i).id);
986       }
987       assertEquals(expected.hits.get(i).id,
988                    s.doc(actual.hits.scoreDocs[i].doc).get("id"));
989       // Score should be IDENTICAL:
990       assertEquals(scores.get(expected.hits.get(i).id), actual.hits.scoreDocs[i].score, 0.0f);
991     }
992 
993     for(int dim=0;dim<expected.counts.length;dim++) {
994       int topN = random().nextBoolean() ? dimValues[dim].length : TestUtil.nextInt(random(), 1, dimValues[dim].length);
995       FacetResult fr = actual.facets.getTopChildren(topN, "dim"+dim);
996       if (VERBOSE) {
997         System.out.println("    dim" + dim + " topN=" + topN + " (vs " + dimValues[dim].length + " unique values)");
998         System.out.println("      actual");
999       }
1000 
1001       int idx = 0;
1002       Map<String,Integer> actualValues = new HashMap<>();
1003 
1004       if (fr != null) {
1005         for(LabelAndValue labelValue : fr.labelValues) {
1006           actualValues.put(labelValue.label, labelValue.value.intValue());
1007           if (VERBOSE) {
1008             System.out.println("        " + idx + ": " + new BytesRef(labelValue.label) + ": " + labelValue.value);
1009             idx++;
1010           }
1011         }
1012         assertEquals("dim=" + dim, expected.uniqueCounts[dim], fr.childCount);
1013       }
1014 
1015       if (topN < dimValues[dim].length) {
1016         int[] topNIDs = getTopNOrds(expected.counts[dim], dimValues[dim], topN);
1017         if (VERBOSE) {
1018           idx = 0;
1019           System.out.println("      expected (sorted)");
1020           for(int i=0;i<topNIDs.length;i++) {
1021             int expectedOrd = topNIDs[i];
1022             String value = dimValues[dim][expectedOrd];
1023             System.out.println("        " + idx + ": " + new BytesRef(value) + ": " + expected.counts[dim][expectedOrd]);
1024             idx++;
1025           }
1026         }
1027         if (VERBOSE) {
1028           System.out.println("      topN=" + topN + " expectedTopN=" + topNIDs.length);
1029         }
1030 
1031         if (fr != null) {
1032           assertEquals(topNIDs.length, fr.labelValues.length);
1033         } else {
1034           assertEquals(0, topNIDs.length);
1035         }
1036         for(int i=0;i<topNIDs.length;i++) {
1037           int expectedOrd = topNIDs[i];
1038           assertEquals(expected.counts[dim][expectedOrd], fr.labelValues[i].value.intValue());
1039           if (isSortedSetDV) {
1040             // Tie-break facet labels are only in unicode
1041             // order with SortedSetDVFacets:
1042             assertEquals("value @ idx=" + i, dimValues[dim][expectedOrd], fr.labelValues[i].label);
1043           }
1044         }
1045       } else {
1046 
1047         if (VERBOSE) {
1048           idx = 0;
1049           System.out.println("      expected (unsorted)");
1050           for(int i=0;i<dimValues[dim].length;i++) {
1051             String value = dimValues[dim][i];
1052             if (expected.counts[dim][i] != 0) {
1053               System.out.println("        " + idx + ": " + new BytesRef(value) + ": " + expected.counts[dim][i]);
1054               idx++;
1055             } 
1056           }
1057         }
1058 
1059         int setCount = 0;
1060         for(int i=0;i<dimValues[dim].length;i++) {
1061           String value = dimValues[dim][i];
1062           if (expected.counts[dim][i] != 0) {
1063             assertTrue(actualValues.containsKey(value));
1064             assertEquals(expected.counts[dim][i], actualValues.get(value).intValue());
1065             setCount++;
1066           } else {
1067             assertFalse(actualValues.containsKey(value));
1068           }
1069         }
1070         assertEquals(setCount, actualValues.size());
1071       }
1072     }
1073   }
1074 
1075   public void testEmptyIndex() throws Exception {
1076     // LUCENE-5045: make sure DrillSideways works with an empty index
1077     Directory dir = newDirectory();
1078     Directory taxoDir = newDirectory();
1079     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
1080     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
1081     IndexSearcher searcher = newSearcher(writer.getReader());
1082     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
1083 
1084     // Count "Author"
1085     FacetsConfig config = new FacetsConfig();
1086     DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
1087     DrillDownQuery ddq = new DrillDownQuery(config);
1088     ddq.add("Author", "Lisa");
1089     
1090     DrillSidewaysResult r = ds.search(ddq, 10); // this used to fail on IllegalArgEx
1091     assertEquals(0, r.hits.totalHits);
1092 
1093     r = ds.search(ddq, null, null, 10, new Sort(new SortField("foo", SortField.Type.INT)), false, false); // this used to fail on IllegalArgEx
1094     assertEquals(0, r.hits.totalHits);
1095 
1096     writer.close();
1097     IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir);
1098   }
1099 
1100   public void testScorer() throws Exception {
1101     // LUCENE-6001 some scorers, eg ReqExlScorer, can hit NPE if cost is called after nextDoc
1102     Directory dir = newDirectory();
1103     Directory taxoDir = newDirectory();
1104 
1105     // Writes facet ords to a separate directory from the
1106     // main index:
1107     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
1108 
1109     FacetsConfig config = new FacetsConfig();
1110 
1111     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
1112 
1113     Document doc = new Document();
1114     doc.add(newTextField("field", "foo bar", Field.Store.NO));
1115     doc.add(new FacetField("Author", "Bob"));
1116     doc.add(new FacetField("dim", "a"));
1117     writer.addDocument(config.build(taxoWriter, doc));
1118 
1119     // NRT open
1120     IndexSearcher searcher = newSearcher(writer.getReader());
1121 
1122     // NRT open
1123     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
1124 
1125     DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
1126 
1127     BooleanQuery.Builder bq = new BooleanQuery.Builder();
1128     bq.setDisableCoord(true);
1129     bq.add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.MUST);
1130     bq.add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST_NOT);
1131     DrillDownQuery ddq = new DrillDownQuery(config, bq.build());
1132     ddq.add("field", "foo");
1133     ddq.add("author", bq.build());
1134     ddq.add("dim", bq.build());
1135     DrillSidewaysResult r = ds.search(null, ddq, 10);
1136     assertEquals(0, r.hits.totalHits);
1137 
1138     writer.close();
1139     IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
1140   }
1141 }
1142